tidy::nest() function and and data frames with list-columns:log10(gdpPercap) on year for each country.library(gapminder)
gap_nested <- gapminder %>%
group_by(country, continent) %>%
nest()
gap_nested
## # A tibble: 142 × 3
## # Groups: country, continent [142]
## country continent data
## <fct> <fct> <list>
## 1 Afghanistan Asia <tibble [12 × 4]>
## 2 Albania Europe <tibble [12 × 4]>
## 3 Algeria Africa <tibble [12 × 4]>
## 4 Angola Africa <tibble [12 × 4]>
## 5 Argentina Americas <tibble [12 × 4]>
## 6 Australia Oceania <tibble [12 × 4]>
## 7 Austria Europe <tibble [12 × 4]>
## 8 Bahrain Asia <tibble [12 × 4]>
## 9 Bangladesh Asia <tibble [12 × 4]>
## 10 Belgium Europe <tibble [12 × 4]>
## # … with 132 more rows
gap_nested_lm <- gap_nested %>%
mutate(model = map(data, ~lm(log10(gdpPercap) ~ year, data = .)))
gap_nested_lm
## # A tibble: 142 × 4
## # Groups: country, continent [142]
## country continent data model
## <fct> <fct> <list> <list>
## 1 Afghanistan Asia <tibble [12 × 4]> <lm>
## 2 Albania Europe <tibble [12 × 4]> <lm>
## 3 Algeria Africa <tibble [12 × 4]> <lm>
## 4 Angola Africa <tibble [12 × 4]> <lm>
## 5 Argentina Americas <tibble [12 × 4]> <lm>
## 6 Australia Oceania <tibble [12 × 4]> <lm>
## 7 Austria Europe <tibble [12 × 4]> <lm>
## 8 Bahrain Asia <tibble [12 × 4]> <lm>
## 9 Bangladesh Asia <tibble [12 × 4]> <lm>
## 10 Belgium Europe <tibble [12 × 4]> <lm>
## # … with 132 more rows
gap_nested_lm_resid <- gap_nested_lm %>%
mutate(resid = map2(data, model, add_residuals)) %>%
unnest(resid)
gap_nested_lm_resid
## # A tibble: 1,704 × 9
## # Groups: country, continent [142]
## country continent data model year lifeExp pop gdpPercap resid
## <fct> <fct> <list> <list> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghanistan Asia <tibble> <lm> 1952 28.8 8.43e6 779. -0.0202
## 2 Afghanistan Asia <tibble> <lm> 1957 30.3 9.24e6 821. 0.00433
## 3 Afghanistan Asia <tibble> <lm> 1962 32.0 1.03e7 853. 0.0231
## 4 Afghanistan Asia <tibble> <lm> 1967 34.0 1.15e7 836. 0.0164
## 5 Afghanistan Asia <tibble> <lm> 1972 36.1 1.31e7 740. -0.0347
## 6 Afghanistan Asia <tibble> <lm> 1977 38.4 1.49e7 786. -0.00641
## 7 Afghanistan Asia <tibble> <lm> 1982 39.9 1.29e7 978. 0.0905
## 8 Afghanistan Asia <tibble> <lm> 1987 40.8 1.39e7 852. 0.0328
## 9 Afghanistan Asia <tibble> <lm> 1992 41.7 1.63e7 649. -0.0834
## 10 Afghanistan Asia <tibble> <lm> 1997 41.8 2.22e7 635. -0.0909
## # … with 1,694 more rows
gap_nested_lm_resid %>%
ggplot(aes(year, resid)) +
geom_line(alpha = 1/3, aes(group = country)) +
geom_smooth(color = "blue") +
labs(
title = "Residual Graph for Each Country"
) +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gap_nested_lm_resid %>%
ggplot(aes(year, resid)) +
geom_line(alpha = 1/3, aes(group = country)) +
geom_smooth(color = "blue") +
facet_wrap(~continent) +
labs(
title = "Residual Graph for Each Continent"
) +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gap_nested_lm_tidy <- gap_nested_lm %>%
mutate(tidy = map(model, tidy))
gap_nested_lm_tidy %>%
mutate(tidy = map(tidy, ~filter(., term == "year"))) %>%
unnest(tidy) %>%
ggplot(aes(continent, estimate)) +
geom_boxplot() +
geom_beeswarm() +
labs(
title = "Continent-wise Beeswarmplot for Slope Coefficient Value"
) +
theme(plot.title = element_text(hjust = 0.5))
gap_nested_lm_tidy %>%
mutate(tidy = map(tidy, ~filter(., term == "year"))) %>%
unnest(tidy) %>%
ggplot(aes(continent, statistic)) +
geom_boxplot() +
geom_beeswarm() +
labs(
title = "Continent-wise Beeswarmplot for t-statistic Value"
) +
theme(plot.title = element_text(hjust = 0.5))
According to the graph, we can see that
negativeCountries <- gap_nested_lm_tidy %>%
mutate(tidy = map(tidy, ~filter(., term == "year"))) %>%
unnest(tidy) %>%
filter(estimate < 0 & p.value < 0.05) %>%
ungroup() %>%
select(country, estimate, p.value)
negativeCountries
## # A tibble: 8 × 3
## country estimate p.value
## <fct> <dbl> <dbl>
## 1 Central African Republic -0.00461 0.0000171
## 2 Congo, Dem. Rep. -0.0106 0.000139
## 3 Djibouti -0.00354 0.0103
## 4 Haiti -0.00277 0.0160
## 5 Kuwait -0.0105 0.00137
## 6 Madagascar -0.00485 0.000337
## 7 Niger -0.00394 0.00591
## 8 Somalia -0.00316 0.00339
log10(gdpPercap) for the countries identified in step 4.gapminder %>%
filter(country %in% negativeCountries$country) %>%
group_by(country, continent) %>%
ggplot(aes(x = year, y = log10(gdpPercap), color = country)) +
geom_line()
mtcars_cv <- mtcars %>%
crossv_kfold(k = 8)
mtcars_cv
## # A tibble: 8 × 3
## train test .id
## <named list> <named list> <chr>
## 1 <resample [28 x 11]> <resample [4 x 11]> 1
## 2 <resample [28 x 11]> <resample [4 x 11]> 2
## 3 <resample [28 x 11]> <resample [4 x 11]> 3
## 4 <resample [28 x 11]> <resample [4 x 11]> 4
## 5 <resample [28 x 11]> <resample [4 x 11]> 5
## 6 <resample [28 x 11]> <resample [4 x 11]> 6
## 7 <resample [28 x 11]> <resample [4 x 11]> 7
## 8 <resample [28 x 11]> <resample [4 x 11]> 8
mtcars_lm <- mtcars %>%
lm(mpg ~ wt, .)
mtcars_lm
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
mtcars_nls <- mtcars %>%
nls(mpg ~ k / wt + b, ., start = list(k = 1, b = 0))
mtcars_nls
## Nonlinear regression model
## model: mpg ~ k/wt + b
## data: .
## k b
## 45.829 4.386
## residual sum-of-squares: 230.9
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 2.877e-08
lm_rmse_mean <- mtcars_cv %>%
mutate(lm = map(train, ~lm(mpg ~ wt, data = .))) %$%
map2_dbl(lm, test, rmse) %>%
mean()
nls_rmse_mean <- mtcars_cv %>%
mutate(
nls = map(train, ~nls(mpg ~ k/wt+b, data = .[[1]], start = list(k = 1, b = 0)))
) %$%
map2_dbl(nls, test, rmse) %>%
mean()
print(paste("Linear model RSME mean: ", lm_rmse_mean, sep = ''))
## [1] "Linear model RSME mean: 2.80450015327895"
print(paste('Non-linear model RSME mean: ', nls_rmse_mean, sep = ''))
## [1] "Non-linear model RSME mean: 7.88101654971769"
Comparing the mean RSME of both models, we can see that linear model’s mean RSME is less than non-linear model’s one. This means that the performance of the linear model is better than non-linear model.
nodes <- read.csv(
"Dataset1-Media-Example-NODES.csv",
header = T,
as.is = T
) %>%
as_tibble()
nodes
## # A tibble: 17 × 5
## id media media.type type.label audience.size
## <chr> <chr> <int> <chr> <int>
## 1 s01 NY Times 1 Newspaper 20
## 2 s02 Washington Post 1 Newspaper 25
## 3 s03 Wall Street Journal 1 Newspaper 30
## 4 s04 USA Today 1 Newspaper 32
## 5 s05 LA Times 1 Newspaper 20
## 6 s06 New York Post 1 Newspaper 50
## 7 s07 CNN 2 TV 56
## 8 s08 MSNBC 2 TV 34
## 9 s09 FOX News 2 TV 60
## 10 s10 ABC 2 TV 23
## 11 s11 BBC 2 TV 34
## 12 s12 Yahoo News 3 Online 33
## 13 s13 Google News 3 Online 23
## 14 s14 Reuters.com 3 Online 12
## 15 s15 NYTimes.com 3 Online 24
## 16 s16 WashingtonPost.com 3 Online 28
## 17 s17 AOL.com 3 Online 33
links <- read.csv(
"Dataset1-Media-Example-EDGES.csv",
header = T,
as.is = T
) %>%
as_tibble()
links
## # A tibble: 49 × 4
## from to type weight
## <chr> <chr> <chr> <int>
## 1 s01 s02 hyperlink 22
## 2 s01 s03 hyperlink 22
## 3 s01 s04 hyperlink 21
## 4 s01 s15 mention 20
## 5 s02 s01 hyperlink 23
## 6 s02 s03 hyperlink 21
## 7 s02 s09 hyperlink 1
## 8 s02 s10 hyperlink 5
## 9 s03 s01 hyperlink 21
## 10 s03 s04 hyperlink 22
## # … with 39 more rows
net_igraph <- graph_from_data_frame(d = links, vertices = nodes, directed = T)
net_igraph
## IGRAPH a85b62b DNW- 17 49 --
## + attr: name (v/c), media (v/c), media.type (v/n), type.label (v/c),
## | audience.size (v/n), type (e/c), weight (e/n)
## + edges from a85b62b (vertex names):
## [1] s01->s02 s01->s03 s01->s04 s01->s15 s02->s01 s02->s03 s02->s09 s02->s10
## [9] s03->s01 s03->s04 s03->s05 s03->s08 s03->s10 s03->s11 s03->s12 s04->s03
## [17] s04->s06 s04->s11 s04->s12 s04->s17 s05->s01 s05->s02 s05->s09 s05->s15
## [25] s06->s06 s06->s16 s06->s17 s07->s03 s07->s08 s07->s10 s07->s14 s08->s03
## [33] s08->s07 s08->s09 s09->s10 s10->s03 s12->s06 s12->s13 s12->s14 s13->s12
## [41] s13->s17 s14->s11 s14->s13 s15->s01 s15->s04 s15->s06 s16->s06 s16->s17
## [49] s17->s04
plot(net_igraph)
net_network <- asNetwork(net_igraph)
net_network
## Network attributes:
## vertices = 17
## directed = TRUE
## hyper = FALSE
## loops = TRUE
## multiple = FALSE
## bipartite = FALSE
## total edges= 49
## missing edges= 0
## non-missing edges= 49
##
## Vertex attribute names:
## audience.size media media.type type.label vertex.names
##
## Edge attribute names:
## type weight
net_network %>%
ggnet2(size = "audience.size", color = "media.type", label = T)
net_network %>%
degree(gmode = "graph") %>%
mean()
## [1] 2.823529
net_network %>%
network.density()
## [1] 0.1695502
url <- "https://www.worldometers.info/world-population/population-by-country/"
country_population <- url %>%
read_html() %>%
html_nodes("table") %>%
html_table(fill = TRUE) %>%
.[[1]]
country_population
## # A tibble: 235 × 12
## `#` `Country (or dependency)` `Population (2…` `Yearly Change` `Net Change`
## <int> <chr> <chr> <chr> <chr>
## 1 1 China 1,439,323,776 0.39 % 5,540,090
## 2 2 India 1,380,004,385 0.99 % 13,586,631
## 3 3 United States 331,002,651 0.59 % 1,937,734
## 4 4 Indonesia 273,523,615 1.07 % 2,898,047
## 5 5 Pakistan 220,892,340 2.00 % 4,327,022
## 6 6 Brazil 212,559,417 0.72 % 1,509,890
## 7 7 Nigeria 206,139,589 2.58 % 5,175,990
## 8 8 Bangladesh 164,689,383 1.01 % 1,643,222
## 9 9 Russia 145,934,462 0.04 % 62,206
## 10 10 Mexico 128,932,753 1.06 % 1,357,224
## # … with 225 more rows, and 7 more variables: `Density (P/Km²)` <chr>,
## # `Land Area (Km²)` <chr>, `Migrants (net)` <chr>, `Fert. Rate` <chr>,
## # `Med. Age` <chr>, `Urban Pop %` <chr>, `World Share` <chr>
country_population <- country_population %>%
rename(
`Density (P/Km2)` = `Density (P/Km²)`,
`Land Area (Km2)` = `Land Area (Km²)`
) %>%
mutate(
`Country (or dependency)` = tolower(`Country (or dependency)`),
`Population (2020)` = as.integer(
str_replace_all(.$`Population (2020)`, ",", "")
),
`Yearly Change` = as.double(
str_replace_all(.$`Yearly Change`, " %", "")
),
`Net Change` = as.integer(
str_replace_all(.$`Net Change`, ',', '')
),
`Density (P/Km2)` = as.integer(
str_replace_all(.$`Density (P/Km2)`, ',', '')
),
`Land Area (Km2)` = as.integer(
str_replace_all(.$`Land Area (Km2)`, ',', '')
),
`Migrants (net)` = as.integer(
str_replace_all(.$`Migrants (net)`, ',', '')
),
`Fert. Rate` = as.double(`Fert. Rate`),
`Med. Age` = as.integer(`Med. Age`),
`Urban Pop %` = as.integer(
str_replace_all(.$`Urban Pop %`, ' %', '')
),
`World Share` = as.double(
str_replace_all(.$`World Share`, ' %', '')
)
)
country_population
## # A tibble: 235 × 12
## `#` `Country (or dependency)` `Population (2…` `Yearly Change` `Net Change`
## <int> <chr> <int> <dbl> <int>
## 1 1 china 1439323776 0.39 5540090
## 2 2 india 1380004385 0.99 13586631
## 3 3 united states 331002651 0.59 1937734
## 4 4 indonesia 273523615 1.07 2898047
## 5 5 pakistan 220892340 2 4327022
## 6 6 brazil 212559417 0.72 1509890
## 7 7 nigeria 206139589 2.58 5175990
## 8 8 bangladesh 164689383 1.01 1643222
## 9 9 russia 145934462 0.04 62206
## 10 10 mexico 128932753 1.06 1357224
## # … with 225 more rows, and 7 more variables: `Density (P/Km2)` <int>,
## # `Land Area (Km2)` <int>, `Migrants (net)` <int>, `Fert. Rate` <dbl>,
## # `Med. Age` <int>, `Urban Pop %` <int>, `World Share` <dbl>
country_population <- country_population %>%
mutate(
`Country (or dependency)` = case_when(
`Country (or dependency)` == "united states" ~ "united states of america",
`Country (or dependency)` == 'serbia' ~ 'republic of serbia',
`Country (or dependency)` == 'tanzania' ~ 'united republic of tanzania',
`Country (or dependency)` == 'north macedonia' ~ 'macedonia',
`Country (or dependency)` == 'bahamas' ~ 'the bahamas',
`Country (or dependency)` == 'timor-leste' ~ 'east timor',
str_detect(`Country (or dependency)`, "ivoire") ~ 'ivory coast',
`Country (or dependency)` == 'dr congo' ~ 'democratic republic of the congo',
`Country (or dependency)` == 'congo' ~ 'republic of congo',
`Country (or dependency)` == 'czech republic (czechia)' ~ 'czech republic',
`Country (or dependency)` == 'guinea-bissau' ~ 'guinea bissau',
T ~ `Country (or dependency)`
)
)
data(country.regions)
setdiff(
country.regions$region,
country_population$`Country (or dependency)`
) %>%
as_tibble_col(column_name = "region")
## # A tibble: 5 × 1
## region
## <chr>
## 1 somaliland
## 2 swaziland
## 3 northern cyprus
## 4 antarctica
## 5 kosovo
setdiff(
country_population$`Country (or dependency)`,
country.regions$region
) %>%
as_tibble_col(column_name = "region")
## # A tibble: 69 × 1
## region
## <chr>
## 1 hong kong
## 2 singapore
## 3 state of palestine
## 4 puerto rico
## 5 bahrain
## 6 mauritius
## 7 eswatini
## 8 réunion
## 9 comoros
## 10 macao
## # … with 59 more rows
country_population %>%
mutate(region = `Country (or dependency)`, value = `Density (P/Km2)`) %>%
country_choropleth(title = 'World Population Density Map', num_colors = 9) +
theme(plot.title = element_text(hjust = 0.5))
census_api_key("44ed2f559fc8ff1593dc792e28e54bd655f7f900", install = T, overwrite = T)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY").
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "44ed2f559fc8ff1593dc792e28e54bd655f7f900"
nj_median_household_income_and_rental <- get_acs(
geography = "tract",
variables = c(medincome = "B19013_001", medrental = "B25064_001"),
state = "NJ",
year = 2020,
geometry = T
) %>%
as_tibble() %>%
pivot_wider(
names_from = variable,
values_from = c(estimate, moe)
)
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
nj_median_household_income_and_rental
## # A tibble: 2,181 × 7
## GEOID NAME geometry estimate_medinc… estimate_medren…
## <chr> <chr> <MULTIPOLYGON [°]> <dbl> <dbl>
## 1 34021002200 Cens… (((-74.74945 40.22568, -… 54860 1121
## 2 34021004404 Cens… (((-74.50135 40.25484, -… 100282 1685
## 3 34021000600 Cens… (((-74.74294 40.21703, -… 66029 1180
## 4 34021001200 Cens… (((-74.81737 40.24094, -… 49659 1163
## 5 34025805700 Cens… (((-74.01226 40.30434, -… 57823 1199
## 6 34025809703 Cens… (((-74.29578 40.3226, -7… 130705 2262
## 7 34025807004 Cens… (((-74.00926 40.23071, -… 52440 1210
## 8 34025810300 Cens… (((-74.3717 40.29863, -7… 84091 1691
## 9 34025806504 Cens… (((-74.05513 40.25025, -… 72267 1200
## 10 34025802500 Cens… (((-74.23142 40.44554, -… 97232 1136
## # … with 2,171 more rows, and 2 more variables: moe_medincome <dbl>,
## # moe_medrental <dbl>
library(biscale)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
nj_median_household_income_and_rental <- nj_median_household_income_and_rental %>%
bi_class(
x = estimate_medincome,
y = estimate_medrental,
style = "quantile",
dim = 3
)
nj_median_household_income_and_rental
## # A tibble: 2,181 × 8
## GEOID NAME geometry estimate_medinc… estimate_medren…
## <chr> <chr> <MULTIPOLYGON [°]> <dbl> <dbl>
## 1 34021002200 Cens… (((-74.74945 40.22568, -… 54860 1121
## 2 34021004404 Cens… (((-74.50135 40.25484, -… 100282 1685
## 3 34021000600 Cens… (((-74.74294 40.21703, -… 66029 1180
## 4 34021001200 Cens… (((-74.81737 40.24094, -… 49659 1163
## 5 34025805700 Cens… (((-74.01226 40.30434, -… 57823 1199
## 6 34025809703 Cens… (((-74.29578 40.3226, -7… 130705 2262
## 7 34025807004 Cens… (((-74.00926 40.23071, -… 52440 1210
## 8 34025810300 Cens… (((-74.3717 40.29863, -7… 84091 1691
## 9 34025806504 Cens… (((-74.05513 40.25025, -… 72267 1200
## 10 34025802500 Cens… (((-74.23142 40.44554, -… 97232 1136
## # … with 2,171 more rows, and 3 more variables: moe_medincome <dbl>,
## # moe_medrental <dbl>, bi_class <chr>
map <- nj_median_household_income_and_rental %>%
ggplot() +
geom_sf(
aes(fill = bi_class, geometry = geometry),
color = "white",
size = 0.1,
show.legend = F
) +
bi_scale_fill(pal = "DkBlue", dim = 3) +
labs(
x = NULL,
y = NULL,
title = "Tract-wise NJ Median Household Income Against Median Household Rental"
) +
# theme_void() +
theme(
plot.title = element_text(hjust = 0.5)
)
map
legend <- bi_legend(
pal = "DkBlue",
dim = 3,
xlab = "Median income",
ylab = "Median rental",
size = 8
)
legend
ggdraw() +
draw_plot(map, 0, 0, 1, 1) +
draw_plot(legend, 0.7, 0.15, 0.3, 0.3)
Based on the above graph, most counties in the center Jersey and north Jersey are blue which means that people in these areas have a high income. Meanwhile, some counties in the south Jersey are in purple which means that people lived there may have a relatively high rental.
lm <- nj_median_household_income_and_rental %>%
lm(estimate_medrental ~ estimate_medincome, .)
lm %>%
tidy() %>%
unnest()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c()`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 833. 20.0 41.8 3.54e-275
## 2 estimate_medincome 0.00756 0.000203 37.2 2.98e-231
lm %>%
glance
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.407 0.407 380. 1385. 2.98e-231 1 -14872. 29749. 29766.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
nj_median_household_income_and_rental %>%
add_predictions(lm) %>%
ggplot() +
geom_point(aes(estimate_medincome, estimate_medrental)) +
geom_line(aes(estimate_medincome, pred), color = 'red')
## Warning: Removed 160 rows containing missing values (geom_point).
## Warning: Removed 23 row(s) containing missing values (geom_path).
nls <- nj_median_household_income_and_rental %>%
lm(
estimate_medrental ~ log(estimate_medincome, 10),
.,
)
nls %>%
tidy %>%
unnest()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c()`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -5523. 197. -28.0 6.54e-146
## 2 log(estimate_medincome, 10) 1434. 40.2 35.6 2.55e-216
nls %>%
glance
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.386 0.386 387. 1271. 2.55e-216 1 -14906. 29818. 29835.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
nj_median_household_income_and_rental %>%
add_predictions(nls) %>%
ggplot() +
geom_point(aes(estimate_medincome, estimate_medrental)) +
geom_line(aes(estimate_medincome, pred), color = 'red')
## Warning: Removed 160 rows containing missing values (geom_point).
## Warning: Removed 23 row(s) containing missing values (geom_path).
nj_median_rental_2011_2015 <- get_acs(
geograph = "county",
variables = c(medrental = "B25064_001"),
state = "NJ",
year = 2015,
geometry = T
) %>%
as_tibble() %>%
rename(
`2011-2015 Rental` = estimate
)
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
nj_median_rental_2011_2015
## # A tibble: 21 × 6
## GEOID NAME variable `2011-2015 Ren…` moe geometry
## <chr> <chr> <chr> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 34007 Camden Count… medrent… 978 10 (((-75.13598 39.8932, -7…
## 2 34013 Essex County… medrent… 1068 8 (((-74.37623 40.76275, -…
## 3 34019 Hunterdon Co… medrent… 1328 36 (((-75.19511 40.57969, -…
## 4 34023 Middlesex Co… medrent… 1299 13 (((-74.63 40.34341, -74.…
## 5 34041 Warren Count… medrent… 1013 29 (((-75.20392 40.6915, -7…
## 6 34001 Atlantic Cou… medrent… 1047 21 (((-74.98522 39.5148, -7…
## 7 34015 Gloucester C… medrent… 1072 25 (((-75.4383 39.7844, -75…
## 8 34021 Mercer Count… medrent… 1132 14 (((-74.94228 40.34089, -…
## 9 34029 Ocean County… medrent… 1322 21 (((-74.55311 40.07913, -…
## 10 34031 Passaic Coun… medrent… 1184 12 (((-74.50288 41.08592, -…
## # … with 11 more rows
nj_median_rental_2016_2020 <- get_acs(
geograph = "county",
variables = c(medrental = "B25064_001"),
state = "NJ",
year = 2020,
geometry = T
) %>%
as_tibble() %>%
rename(
`2016-2020 Rental` = estimate
)
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
nj_median_rental_2016_2020
## # A tibble: 21 × 6
## GEOID NAME variable `2016-2020 Ren…` moe geometry
## <chr> <chr> <chr> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 34041 Warren Count… medrent… 1128 29 (((-75.20392 40.6915, -7…
## 2 34033 Salem County… medrent… 1024 41 (((-75.41272 39.38437, -…
## 3 34005 Burlington C… medrent… 1388 21 (((-75.05965 39.99215, -…
## 4 34021 Mercer Count… medrent… 1311 23 (((-74.94295 40.34164, -…
## 5 34025 Monmouth Cou… medrent… 1437 24 (((-74.61353 40.18267, -…
## 6 34017 Hudson Count… medrent… 1450 14 (((-74.0422 40.69997, -7…
## 7 34031 Passaic Coun… medrent… 1310 13 (((-74.50321 41.08587, -…
## 8 34027 Morris Count… medrent… 1622 30 (((-74.88923 40.78883, -…
## 9 34013 Essex County… medrent… 1211 11 (((-74.37623 40.76275, -…
## 10 34009 Cape May Cou… medrent… 1176 45 (((-74.97191 38.94058, -…
## # … with 11 more rows
nj_median_rental <- inner_join(
nj_median_rental_2011_2015,
nj_median_rental_2016_2020,
by = "GEOID"
) %>%
mutate(
`Percentage Change` = (`2016-2020 Rental` - `2011-2015 Rental`)/`2011-2015 Rental`,
County = str_match(NAME.x, "(?<county>\\S*) County*")[, "county"]
) %>%
select(
County,
`2011-2015 Rental`,
`2016-2020 Rental`,
`Percentage Change`
)
nj_median_rental
## # A tibble: 21 × 4
## County `2011-2015 Rental` `2016-2020 Rental` `Percentage Change`
## <chr> <dbl> <dbl> <dbl>
## 1 Camden 978 1107 0.132
## 2 Essex 1068 1211 0.134
## 3 Hunterdon 1328 1443 0.0866
## 4 Middlesex 1299 1495 0.151
## 5 Warren 1013 1128 0.114
## 6 Atlantic 1047 1129 0.0783
## 7 Gloucester 1072 1258 0.174
## 8 Mercer 1132 1311 0.158
## 9 Ocean 1322 1459 0.104
## 10 Passaic 1184 1310 0.106
## # … with 11 more rows
nj_median_rental %>%
ggplot(
aes(
x = reorder(County, -`Percentage Change`),
y = `Percentage Change`
)
) +
geom_bar(stat = "identity") +
labs(
x = "County",
y = "Percentage Change",
title = "The Percentage Change of Each County between 2011-2015 to 2016-2020"
) +
theme(plot.title = element_text(hjust = 0.5))